cd ../..
/Users/shanekercheval/repos/data-science-template
%run "source/config/notebook_settings.py"
import os
import mlflow
from mlflow.tracking import MlflowClient
from helpsk.utility import read_pickle
import helpsk as hlp
from source.library.utilities import Timer, log_info, get_config
config = get_config()
mlflow_uri = config['MLFLOW']['URI']
log_info(f"MLFlow URI: {mlflow_uri}")
client = MlflowClient(tracking_uri='http://127.0.0.1:1234')
2022-06-10 14:49:52 - INFO | MLFlow URI: http://127.0.0.1:1234
credit_experiment = client.get_experiment_by_name(name=config['MLFLOW']['EXPERIMENT_NAME'])
runs = client.list_run_infos(experiment_id=credit_experiment.experiment_id)
latest_run = runs[np.argmax([x.start_time for x in runs])]
yaml_path = client.download_artifacts(run_id=latest_run.run_id, path='experiment.yaml')
results = hlp.sklearn_eval.MLExperimentResults.from_yaml_file(yaml_file_name = yaml_path)
best_estimator = read_pickle(client.download_artifacts(
run_id=latest_run.run_id,
path='experiment_best_estimator.pkl'
))
best_estimator
Pipeline(steps=[('prep',
ColumnTransformer(transformers=[('numeric',
Pipeline(steps=[('imputer',
TransformerChooser(transformer=SimpleImputer())),
('scaler',
TransformerChooser()),
('pca',
TransformerChooser())]),
['duration', 'credit_amount',
'installment_commitment',
'residence_since', 'age',
'existing_credits',
'num_dependents']),
('non_numeric',
Pipeline(steps...,
TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore')))]),
['checking_status',
'credit_history', 'purpose',
'savings_status',
'employment',
'personal_status',
'other_parties',
'property_magnitude',
'other_payment_plans',
'housing', 'job',
'own_telephone',
'foreign_worker'])])),
('model',
RandomForestClassifier(n_estimators=500, random_state=42))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. Pipeline(steps=[('prep',
ColumnTransformer(transformers=[('numeric',
Pipeline(steps=[('imputer',
TransformerChooser(transformer=SimpleImputer())),
('scaler',
TransformerChooser()),
('pca',
TransformerChooser())]),
['duration', 'credit_amount',
'installment_commitment',
'residence_since', 'age',
'existing_credits',
'num_dependents']),
('non_numeric',
Pipeline(steps...,
TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore')))]),
['checking_status',
'credit_history', 'purpose',
'savings_status',
'employment',
'personal_status',
'other_parties',
'property_magnitude',
'other_payment_plans',
'housing', 'job',
'own_telephone',
'foreign_worker'])])),
('model',
RandomForestClassifier(n_estimators=500, random_state=42))])ColumnTransformer(transformers=[('numeric',
Pipeline(steps=[('imputer',
TransformerChooser(transformer=SimpleImputer())),
('scaler',
TransformerChooser()),
('pca',
TransformerChooser())]),
['duration', 'credit_amount',
'installment_commitment', 'residence_since',
'age', 'existing_credits',
'num_dependents']),
('non_numeric',
Pipeline(steps=[('encoder',
TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore')))]),
['checking_status', 'credit_history',
'purpose', 'savings_status', 'employment',
'personal_status', 'other_parties',
'property_magnitude', 'other_payment_plans',
'housing', 'job', 'own_telephone',
'foreign_worker'])])['duration', 'credit_amount', 'installment_commitment', 'residence_since', 'age', 'existing_credits', 'num_dependents']
TransformerChooser(transformer=SimpleImputer())
SimpleImputer()
SimpleImputer()
TransformerChooser()
TransformerChooser()
['checking_status', 'credit_history', 'purpose', 'savings_status', 'employment', 'personal_status', 'other_parties', 'property_magnitude', 'other_payment_plans', 'housing', 'job', 'own_telephone', 'foreign_worker']
TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore'))
OneHotEncoder(handle_unknown='ignore')
OneHotEncoder(handle_unknown='ignore')
RandomForestClassifier(n_estimators=500, random_state=42)
client.download_artifacts(run_id=latest_run.run_id, path='x_train.pkl')
'/Users/shanekercheval/repos/data-science-template/mlflow-artifact-root/1/fc34554fdaeb406f9123563a6f2a22d7/artifacts/x_train.pkl'
with Timer("Loading training/test datasets"):
X_train = pd.pandas.read_pickle(client.download_artifacts(run_id=latest_run.run_id, path='x_train.pkl'))
X_test = pd.pandas.read_pickle(client.download_artifacts(run_id=latest_run.run_id, path='x_test.pkl'))
y_train = pd.pandas.read_pickle(client.download_artifacts(run_id=latest_run.run_id, path='y_train.pkl'))
y_test = pd.pandas.read_pickle(client.download_artifacts(run_id=latest_run.run_id, path='y_test.pkl'))
2022-06-10 14:49:52 - INFO | *****Timer Started: Loading training/test datasets 2022-06-10 14:49:52 - INFO | *****Timer Finished (0.00 seconds)
log_info(X_train.shape)
log_info(len(y_train))
log_info(X_test.shape)
log_info(len(y_test))
2022-06-10 14:49:52 - INFO | (800, 20) 2022-06-10 14:49:52 - INFO | 800 2022-06-10 14:49:52 - INFO | (200, 20) 2022-06-10 14:49:52 - INFO | 200
np.unique(y_train, return_counts=True)
(array([0, 1]), array([546, 254]))
np.unique(y_train, return_counts=True)[1] / np.sum(np.unique(y_train, return_counts=True)[1])
array([0.6825, 0.3175])
np.unique(y_test, return_counts=True)[1] / np.sum(np.unique(y_test, return_counts=True)[1])
array([0.77, 0.23])
log_info(f"Best Score: {results.best_score}")
2022-06-10 14:49:52 - INFO | Best Score: 0.7759520520207864
log_info(f"Best Params: {results.best_params}")
2022-06-10 14:49:52 - INFO | Best Params: {'model': 'RandomForestClassifier()', 'imputer': 'SimpleImputer()', 'scaler': 'None', 'pca': 'None', 'encoder': 'OneHotEncoder()'}
# Best model from each model-type.
df = results.to_formatted_dataframe(return_style=False, include_rank=True)
df["model_rank"] = df.groupby("model")["roc_auc Mean"].rank(method="first", ascending=False)
df.query('model_rank == 1')
| rank | roc_auc Mean | roc_auc 95CI.LO | roc_auc 95CI.HI | model | C | max_features | max_depth | n_estimators | min_samples_split | min_samples_leaf | max_samples | criterion | learning_rate | min_child_weight | subsample | colsample_bytree | colsample_bylevel | reg_alpha | reg_lambda | imputer | scaler | pca | encoder | model_rank | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 10 | 1 | 0.78 | 0.75 | 0.80 | RandomForestClassifier() | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | SimpleImputer() | None | None | OneHotEncoder() | 1.00 |
| 5 | 2 | 0.77 | 0.77 | 0.78 | ExtraTreesClassifier() | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | SimpleImputer() | None | None | OneHotEncoder() | 1.00 |
| 0 | 3 | 0.77 | 0.75 | 0.80 | LogisticRegression() | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | SimpleImputer() | StandardScaler() | None | OneHotEncoder() | 1.00 |
| 19 | 7 | 0.76 | 0.72 | 0.80 | XGBClassifier() | NaN | NaN | 2.00 | 1095.00 | NaN | NaN | NaN | NaN | 0.04 | 6.00 | 0.88 | 0.75 | 0.56 | 0.04 | 1.09 | SimpleImputer(strategy='median') | None | None | OneHotEncoder() | 1.00 |
results.to_formatted_dataframe(return_style=True,
include_rank=True,
num_rows=500)
| rank | roc_auc Mean | roc_auc 95CI.LO | roc_auc 95CI.HI | model | C | max_features | max_depth | n_estimators | min_samples_split | min_samples_leaf | max_samples | criterion | learning_rate | min_child_weight | subsample | colsample_bytree | colsample_bylevel | reg_alpha | reg_lambda | imputer | scaler | pca | encoder |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0.776 | 0.753 | 0.799 | RandomForestClassifier() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | None | OneHotEncoder() |
| 2 | 0.773 | 0.769 | 0.776 | ExtraTreesClassifier() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | None | OneHotEncoder() |
| 3 | 0.772 | 0.747 | 0.797 | LogisticRegression() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | StandardScaler() | None | OneHotEncoder() |
| 4 | 0.772 | 0.751 | 0.794 | ExtraTreesClassifier() | <NA> | 0.776 | 55.000 | 1,390.000 | 17.000 | 5.000 | 0.556 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | PCA('mle') | CustomOrdinalEncoder() |
| 5 | 0.764 | 0.716 | 0.812 | ExtraTreesClassifier() | <NA> | 0.077 | 49.000 | 1,699.000 | 38.000 | 11.000 | 0.605 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | None | None | CustomOrdinalEncoder() |
| 6 | 0.759 | 0.706 | 0.811 | RandomForestClassifier() | <NA> | 0.220 | 4.000 | 1,656.000 | 27.000 | 6.000 | 0.916 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | None | None | OneHotEncoder() |
| 7 | 0.758 | 0.719 | 0.796 | XGBClassifier() | <NA> | <NA> | 2.000 | 1,095.000 | <NA> | <NA> | <NA> | <NA> | 0.043 | 6.000 | 0.875 | 0.748 | 0.561 | 0.044 | 1.086 | SimpleImputer(strategy='median') | None | None | OneHotEncoder() |
| 8 | 0.752 | 0.707 | 0.797 | RandomForestClassifier() | <NA> | 0.424 | 17.000 | 538.000 | 19.000 | 19.000 | 0.725 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | PCA('mle') | CustomOrdinalEncoder() |
| 9 | 0.750 | 0.688 | 0.812 | RandomForestClassifier() | <NA> | 0.637 | 99.000 | 1,218.000 | 43.000 | 15.000 | 0.662 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | None | PCA('mle') | CustomOrdinalEncoder() |
| 10 | 0.749 | 0.734 | 0.764 | XGBClassifier() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | None | OneHotEncoder() |
| 11 | 0.740 | 0.702 | 0.779 | ExtraTreesClassifier() | <NA> | 0.770 | 13.000 | 739.000 | 29.000 | 27.000 | 0.725 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | PCA('mle') | CustomOrdinalEncoder() |
| 12 | 0.739 | 0.666 | 0.812 | RandomForestClassifier() | <NA> | 0.743 | 82.000 | 1,323.000 | 19.000 | 31.000 | 0.588 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | None | CustomOrdinalEncoder() |
| 13 | 0.737 | 0.697 | 0.777 | ExtraTreesClassifier() | <NA> | 0.778 | 24.000 | 1,816.000 | 7.000 | 45.000 | 0.768 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | None | None | CustomOrdinalEncoder() |
| 14 | 0.734 | 0.699 | 0.769 | LogisticRegression() | 0.019 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | StandardScaler() | None | CustomOrdinalEncoder() |
| 15 | 0.730 | 0.712 | 0.749 | LogisticRegression() | 0.339 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | MinMaxScaler() | PCA('mle') | CustomOrdinalEncoder() |
| 16 | 0.730 | 0.599 | 0.860 | LogisticRegression() | 0.000 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | StandardScaler() | None | OneHotEncoder() |
| 17 | 0.725 | 0.665 | 0.785 | XGBClassifier() | <NA> | <NA> | 3.000 | 1,237.000 | <NA> | <NA> | <NA> | <NA> | 0.224 | 8.000 | 0.737 | 0.682 | 0.917 | 0.157 | 1.278 | SimpleImputer(strategy='most_frequent') | None | None | OneHotEncoder() |
| 18 | 0.724 | 0.691 | 0.756 | XGBClassifier() | <NA> | <NA> | 2.000 | 1,891.000 | <NA> | <NA> | <NA> | <NA> | 0.073 | 5.000 | 0.677 | 0.913 | 0.792 | 0.001 | 1.994 | SimpleImputer(strategy='median') | None | PCA('mle') | OneHotEncoder() |
| 19 | 0.711 | 0.667 | 0.755 | XGBClassifier() | <NA> | <NA> | 2.000 | 1,618.000 | <NA> | <NA> | <NA> | <NA> | 0.070 | 19.000 | 0.714 | 0.880 | 0.665 | 0.127 | 1.638 | SimpleImputer(strategy='median') | None | PCA('mle') | OneHotEncoder() |
| 20 | 0.710 | 0.684 | 0.735 | LogisticRegression() | 0.002 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | MinMaxScaler() | None | CustomOrdinalEncoder() |
results.to_formatted_dataframe(query='model == "RandomForestClassifier()"', include_rank=True)
| rank | roc_auc Mean | roc_auc 95CI.LO | roc_auc 95CI.HI | max_features | max_depth | n_estimators | min_samples_split | min_samples_leaf | max_samples | criterion | imputer | pca | encoder |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0.776 | 0.753 | 0.799 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | OneHotEncoder() |
| 2 | 0.759 | 0.706 | 0.811 | 0.220 | 4.000 | 1,656.000 | 27.000 | 6.000 | 0.916 | gini | SimpleImputer(strategy='most_frequent') | None | OneHotEncoder() |
| 3 | 0.752 | 0.707 | 0.797 | 0.424 | 17.000 | 538.000 | 19.000 | 19.000 | 0.725 | entropy | SimpleImputer() | PCA('mle') | CustomOrdinalEncoder() |
| 4 | 0.750 | 0.688 | 0.812 | 0.637 | 99.000 | 1,218.000 | 43.000 | 15.000 | 0.662 | gini | SimpleImputer(strategy='most_frequent') | PCA('mle') | CustomOrdinalEncoder() |
| 5 | 0.739 | 0.666 | 0.812 | 0.743 | 82.000 | 1,323.000 | 19.000 | 31.000 | 0.588 | entropy | SimpleImputer() | None | CustomOrdinalEncoder() |
results.to_formatted_dataframe(query='model == "LogisticRegression()"', include_rank=True)
| rank | roc_auc Mean | roc_auc 95CI.LO | roc_auc 95CI.HI | C | imputer | scaler | pca | encoder |
|---|---|---|---|---|---|---|---|---|
| 1 | 0.772 | 0.747 | 0.797 | <NA> | SimpleImputer() | StandardScaler() | None | OneHotEncoder() |
| 2 | 0.734 | 0.699 | 0.769 | 0.019 | SimpleImputer(strategy='median') | StandardScaler() | None | CustomOrdinalEncoder() |
| 3 | 0.730 | 0.712 | 0.749 | 0.339 | SimpleImputer(strategy='median') | MinMaxScaler() | PCA('mle') | CustomOrdinalEncoder() |
| 4 | 0.730 | 0.599 | 0.860 | 0.000 | SimpleImputer(strategy='median') | StandardScaler() | None | OneHotEncoder() |
| 5 | 0.710 | 0.684 | 0.735 | 0.002 | SimpleImputer(strategy='median') | MinMaxScaler() | None | CustomOrdinalEncoder() |
results.plot_performance_across_trials(facet_by='model').show()
results.plot_performance_across_trials(query='model == "RandomForestClassifier()"').show()
results.plot_parameter_values_across_trials(query='model == "RandomForestClassifier()"').show()
# results.plot_scatter_matrix(query='model == "RandomForestClassifier()"',
# height=1000, width=1000).show()
results.plot_performance_numeric_params(query='model == "RandomForestClassifier()"',
height=800)
/Users/shanekercheval/repos/data-science-template/.venv/lib/python3.9/site-packages/statsmodels/nonparametric/smoothers_lowess.py:227: RuntimeWarning: invalid value encountered in true_divide
results.plot_parallel_coordinates(query='model == "RandomForestClassifier()"').show()
results.plot_performance_non_numeric_params(query='model == "RandomForestClassifier()"').show()
results.plot_score_vs_parameter(
query='model == "RandomForestClassifier()"',
parameter='max_features',
size='max_depth',
color='encoder',
)
# results.plot_parameter_vs_parameter(
# query='model == "XGBClassifier()"',
# parameter_x='colsample_bytree',
# parameter_y='learning_rate',
# size='max_depth'
# )
# results.plot_parameter_vs_parameter(
# query='model == "XGBClassifier()"',
# parameter_x='colsample_bytree',
# parameter_y='learning_rate',
# size='imputer'
# )
test_predictions = best_estimator.predict_proba(X_test)[:, 1]
test_predictions[0:10]
array([0.13 , 0.522, 0.456, 0.554, 0.066, 0.294, 0.054, 0.302, 0.056,
0.306])
evaluator = hlp.sklearn_eval.TwoClassEvaluator(
actual_values=y_test,
predicted_scores=test_predictions,
score_threshold=0.37
)
evaluator.plot_actual_vs_predict_histogram()
evaluator.plot_confusion_matrix()
evaluator.all_metrics_df(return_style=True,
dummy_classifier_strategy=['prior', 'constant'],
round_by=3)
| Score | Dummy (prior) | Dummy (constant) | Explanation | |
|---|---|---|---|---|
| AUC | 0.785 | 0.500 | 0.500 | Area under the ROC curve (true pos. rate vs false pos. rate); ranges from 0.5 (purely random classifier) to 1.0 (perfect classifier) |
| True Positive Rate | 0.804 | 0.000 | 1.000 | 80.4% of positive instances were correctly identified.; i.e. 37 "Positive Class" labels were correctly identified out of 46 instances; a.k.a Sensitivity/Recall |
| True Negative Rate | 0.682 | 1.000 | 0.000 | 68.2% of negative instances were correctly identified.; i.e. 105 "Negative Class" labels were correctly identified out of 154 instances |
| False Positive Rate | 0.318 | 0.000 | 1.000 | 31.8% of negative instances were incorrectly identified as positive; i.e. 49 "Negative Class" labels were incorrectly identified as "Positive Class", out of 154 instances |
| False Negative Rate | 0.196 | 1.000 | 0.000 | 19.6% of positive instances were incorrectly identified as negative; i.e. 9 "Positive Class" labels were incorrectly identified as "Negative Class", out of 46 instances |
| Positive Predictive Value | 0.430 | 0.000 | 0.230 | When the model claims an instance is positive, it is correct 43.0% of the time; i.e. out of the 86 times the model predicted "Positive Class", it was correct 37 times; a.k.a precision |
| Negative Predictive Value | 0.921 | 0.770 | 0.000 | When the model claims an instance is negative, it is correct 92.1% of the time; i.e. out of the 114 times the model predicted "Negative Class", it was correct 105 times |
| F1 Score | 0.561 | 0.000 | 0.374 | The F1 score can be interpreted as a weighted average of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0. |
| Precision/Recall AUC | 0.512 | 0.230 | 0.230 | Precision/Recall AUC is calculated with `average_precision` which summarizes a precision-recall curve as the weighted mean of precisions achieved at each threshold. See sci-kit learn documentation for caveats. |
| Accuracy | 0.710 | 0.770 | 0.230 | 71.0% of instances were correctly identified |
| Error Rate | 0.290 | 0.230 | 0.770 | 29.0% of instances were incorrectly identified |
| % Positive | 0.230 | 0.230 | 0.230 | 23.0% of the data are positive; i.e. out of 200 total observations; 46 are labeled as "Positive Class" |
| Total Observations | 200 | 200 | 200 | There are 200 total observations; i.e. sample size |
evaluator.plot_roc_auc_curve().show()
<Figure size 720x444.984 with 0 Axes>
evaluator.plot_precision_recall_auc_curve().show()
evaluator.plot_threshold_curves(score_threshold_range=(0.1, 0.7)).show()
evaluator.plot_precision_recall_tradeoff(score_threshold_range=(0.1, 0.6)).show()
evaluator.calculate_lift_gain(return_style=True)
| Gain | Lift | |
|---|---|---|
| Percentile | ||
| 5 | 0.11 | 2.17 |
| 10 | 0.24 | 2.39 |
| 15 | 0.33 | 2.17 |
| 20 | 0.43 | 2.17 |
| 25 | 0.48 | 1.91 |
| 30 | 0.61 | 2.03 |
| 35 | 0.72 | 2.05 |
| 40 | 0.78 | 1.96 |
| 45 | 0.83 | 1.84 |
| 50 | 0.85 | 1.70 |
| 55 | 0.89 | 1.62 |
| 60 | 0.89 | 1.49 |
| 65 | 0.91 | 1.40 |
| 70 | 0.93 | 1.34 |
| 75 | 0.93 | 1.25 |
| 80 | 0.93 | 1.17 |
| 85 | 0.98 | 1.15 |
| 90 | 1.00 | 1.11 |
| 95 | 1.00 | 1.05 |
| 100 | 1.00 | 1.00 |